rm(list = ls()) # remove all the object before starting
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(zCompositions)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66

#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages

The package is in progress but we do not actually need one, we can directly source the functions from github - internet connection needed:

source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")

Load data nicely (using here::here()):

ps = "data/processed/physeq_update_23_11.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>%
  phyloseq_remove_chloro_mitho() -> physeq
## Joining, by = "ASV"

Extract enriched data and add Block variable:

physeq %>%
  subset_samples(Experiment == "Continuous" &
                   Enrichment == "Enriched" ) %>%
  rarefy_even_depth(sample.size = 2574,rngseed = 123) -> physeq_enriched

sample_data(physeq_enriched)$Block = ifelse(sample_data(physeq_enriched)$Treatment %in% c("CTX", "CTX+HV292.1", "HV292.1"), "Exp CTX", "Exp Van")

Heatmaps to visualize ASV level community: Mean values first

physeq_enriched %>%
  phyloseq_ampvis_heatmap(transform = "percent",
                          group_by = "Reactor_Treatment",
                          facet_by = c("Reactor_Treatment","Enrichment", "Block", "Reactor", "Treatment", "Experiment" ),
                          tax_aggregate = "Species",
                          tax_add = NULL,
                          ntax  = 600) -> p
## Warning: There are only 162 taxa, showing all
p + facet_grid( ~ Block + Reactor_Treatment , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

all data.

physeq_enriched %>%
  phyloseq_ampvis_heatmap(transform = "percent",
                          group_by = "Day_of_Treatment",
                          facet_by = c("Reactor_Treatment","Enrichment", "Block", "Reactor", "Treatment", "Experiment" ),
                          tax_aggregate = "Species",
                          tax_add = NULL,
                          ntax  = 600) -> p
## Warning: There are only 162 taxa, showing all
p + facet_grid( ~ Block + Reactor_Treatment, scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

Let’s consider presence absence: what is not present in control but not in other ones.

Define a function:

venn_enrichment <- function(physeq_enrichment,
                              physeq_reactor,
                              venn_column,
                              cut_a = 0,
                              cut_f = 100,
                              groups = c("TR6", "TR5", "TR5_TR6"),
                              group = "Day_of_Treatment",
                              facet_by = c("Reactor_Treatment","Enrichment", "Reactor", "Treatment", "Experiment"),
                              ntax = 600)
{
  
  require(UpSetR); require(MicrobiotaProcess)
  physeq_enrichment -> tmp2
  tax_table(physeq_enrichment) <- tax_table(physeq_enrichment)[,c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Strain")]
  colnames(tax_table(physeq_enrichment)) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
  
  physeq_enrichment %>%
    phyloseq_to_ampvis2() %>%
    amp_venn(group_by = venn_column, cut_a = cut_a, cut_f = cut_f, normalise = FALSE, detailed_output = TRUE) -> venn_p
  
  physeq_enrichment %>%
    MicrobiotaProcess::get_upset(factorNames = venn_column) %>%
    UpSetR::upset(order.by = "freq", empty.intersections = "on") -> up #https://www.rdocumentation.org/packages/ggupset/versions/0.3.0
  
  
  venn_p$Otutable %>%
    dplyr::select(-Kingdom:-Genus) %>%
    dplyr::filter(Shared %in% !!groups) -> df
  
  df %>%
    dplyr::pull(OTU) -> ASV
  
  
  prune_taxa(ASV,
             tmp2) %>%
    phyloseq_ampvis_heatmap(transform = "none",
                            group_by = group,
                            facet_by = facet_by,
                            tax_aggregate = "Species",
                            tax_add = NULL,
                            ntax  = ntax) +
    scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 50, 75, 100),
                         labels = c(0,  0.01, 1, 10, 50, 75, 100),
                         trans = scales::pseudo_log_trans(sigma = 0.001),
                         na.value = 'transparent') -> p
  
  #
  physeq_reactor %>%
    transform_sample_counts(function(x) x/sum(x) * 100) %>%
    plot_taxa_abundances_over_time(taxa_level = "Strain",
                                   taxa_to_plot = df %>% pull("Species"),
                                   time_column = group,
                                   axis_transform = FALSE,
                                   transformation = FALSE,
                                   data_facet1 = "Reactor") -> p2
  
  return(list("venn_plot" = venn_p$plot + ggtitle(paste0("cut_a: ", cut_a, " cut_f: ", cut_f)),
              "upset_plot" = up,
              "filter_df" = df,
              "plot_enrichment" = p,
              "plot_reactor" = p2$plot + ylab("Proportion (%)") + theme_light()
  ))
}

global function

venn_enrichment_all <- function(physeq_enrichment,
                              physeq_reactor,
                              venn_column,
                              cut_a = 0,
                              cut_f = 100,
                              group = "Day_of_Treatment",
                              facet_by = c("Reactor_Treatment","Enrichment", "Reactor", "Treatment", "Experiment"),
                              ntax = 600)
{
  
  require(UpSetR); require(MicrobiotaProcess)
  
  venn_enrichment(physeq_enrichment = physeq_enriched %>%
                    subset_samples(Block == "Exp CTX") %>%
                    subset_samples(Reactor %in% c("CR", "TR1", "TR3")) %>%
                    transform_sample_counts(function(x) x/sum(x) * 100) ,
                  physeq_reactor = physeq %>%
                    subset_samples(Experiment == "Continuous" &
                                     Enrichment == "NotEnriched") %>%
                    subset_samples(Reactor %in% c("CR", "TR1", "TR3")) %>%
                    rarefy_even_depth(sample.size = 2574,rngseed = 123),
                  venn_column = "Reactor",
                  cut_a = cut_a,
                  cut_f = cut_f,
                  groups = c("TR1", "TR3", "TR1_TR3")) -> CTX
  
  
  venn_enrichment(physeq_enrichment = physeq_enriched %>%
                    subset_samples(Block == "Exp Van") %>%
                    subset_samples(Reactor %in% c("CR", "TR5", "TR6")) %>%
                    transform_sample_counts(function(x) x/sum(x) * 100) ,
                  physeq_reactor = physeq %>%
                    subset_samples(Experiment == "Continuous" &
                                     Enrichment == "NotEnriched") %>%
                    subset_samples(Reactor %in% c("CR", "TR5", "TR6")) %>%
                    rarefy_even_depth(sample.size = 2574,rngseed = 123),
                  venn_column = "Reactor",
                  cut_a = cut_a,
                  cut_f = cut_f,
                  groups = c("TR6", "TR5", "TR5_TR6")) -> VAN
  
  return(list("CTX" = CTX,
              "VAN" = VAN
  ))
}
venn_enrichment_all() -> a
## Warning: There are only 27 taxa, showing all
## Warning: There are only 9 taxa, showing all
venn_enrichment_all(cut_a = 1,
                    cut_f = 90) -> b
## Warning: There are only 5 taxa, showing all
## Warning: There are only 3 taxa, showing all
venn_enrichment_all(cut_a = 0.1,
                    cut_f = 80) -> c
## Warning: There are only 14 taxa, showing all
## Warning: There are only 9 taxa, showing all
venn_enrichment_all(cut_a = 0,
                    cut_f = 80) -> d
## Warning: There are only 26 taxa, showing all
## Warning: There are only 10 taxa, showing all

Summary:

VAN:

ggpubr::ggarrange(a$VAN$venn_plot,
                  b$VAN$venn_plot,
                  c$VAN$venn_plot,
                  d$VAN$venn_plot)

ggpubr::ggarrange(a$VAN$plot_enrichment,
                  b$VAN$plot_enrichment,
                  c$VAN$plot_enrichment,
                  d$VAN$plot_enrichment,
                  align = "hv",
                  common.legend = TRUE)

ggpubr::ggarrange(a$VAN$plot_reactor + scale_y_sqrt(),
                  b$VAN$plot_reactor + scale_y_sqrt(),
                  c$VAN$plot_reactor + scale_y_sqrt(),
                  d$VAN$plot_reactor + scale_y_sqrt(),
                  legend = "bottom",
                  common.legend = TRUE)

CTX:

ggpubr::ggarrange(a$CTX$venn_plot,
                  b$CTX$venn_plot,
                  c$CTX$venn_plot,
                  d$CTX$venn_plot)

ggpubr::ggarrange(a$CTX$plot_enrichment,
                  b$CTX$plot_enrichment,
                  c$CTX$plot_enrichment,
                  d$CTX$plot_enrichment,
                  align = "hv",
                  common.legend = TRUE)

ggpubr::ggarrange(a$CTX$plot_reactor + scale_y_sqrt(),
                  b$CTX$plot_reactor + scale_y_sqrt(),
                  c$CTX$plot_reactor + scale_y_sqrt(),
                  d$CTX$plot_reactor + scale_y_sqrt(),
                  legend = "bottom",
                  common.legend = TRUE)

c$CTX$plot_reactor   + scale_y_log10() + facet_grid(Reactor ~ Data) -> p 

p  + theme(legend.position="none")
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

ggpubr::get_legend(p) %>% ggpubr::as_ggplot()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

c$VAN$plot_reactor   + scale_y_log10() + facet_grid(Reactor ~ Data) -> p 

p  + theme(legend.position="none")
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

ggpubr::get_legend(p) %>% ggpubr::as_ggplot()
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis